### This script plots the analyses to compare different ways to incorporate the PDG
### into the Zonation analyses (Fig. 4)

### Libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

### Read data

## Read output data generated by the script spatial_analyses_zonationVSproxiesdivgen_all_ms.R
## this shows the area per taxa and proxies of genetic diversity within it 
## considering the Zonation solution for 20% of the country considering only 
## taxa distribution models (no habitat, etc).
sols_summary_spp<-read.csv("../data/comparations_output/sols_summary_spp.txt")

# check data
head(sols_summary_spp)
##                Solution Richness  shannon   simpson   Area Prop_to_AreaSP
## 1                ENM_sp       64 3.216956 0.9341559 473887      1.0000000
## 2          Scenario_SDM       58 2.908383 0.9179912 127904      0.2699040
## 3       Scenario_SDM_LZ       59 2.929987 0.9210733 127082      0.2681694
## 4      Scenario_SDM_PGD       61 3.565616 0.9677778 142830      0.3014010
## 5   Scenario_SDM_vs_PGD       64 3.718850 0.9720398 124221      0.2621321
## 6 Scenario_SDM_PGD_ADMU       63 3.182794 0.9418937 129665      0.2736201
##   mean.prop median.prop Dist.to.Optimun                            sp
## 1        NA          NA             0.0 Capsicum annuum glabriusculum
## 2 0.3786268   0.2580767        100555.5 Capsicum annuum glabriusculum
## 3 0.4083096   0.3092418        104980.3 Capsicum annuum glabriusculum
## 4 0.5480085   0.4495986        104108.5 Capsicum annuum glabriusculum
## 5 0.6525528   0.6421699        106906.7 Capsicum annuum glabriusculum
## 6 0.4407066   0.3329768        100121.0 Capsicum annuum glabriusculum
### Analyses and plots

## Check mean proportion of taxa area and mean proportion of proxies within it, per solution
group_by(sols_summary_spp, Solution) %>% 
  summarise(., mean.prop.sp.area =  mean(Prop_to_AreaSP) , mean.prop.proxies = mean(mean.prop))
## # A tibble: 6 × 3
##   Solution              mean.prop.sp.area mean.prop.proxies
##   <chr>                             <dbl>             <dbl>
## 1 ENM_sp                            1                NA    
## 2 Scenario_SDM                      0.442             0.514
## 3 Scenario_SDM_LZ                   0.395             0.530
## 4 Scenario_SDM_PGD                  0.338             0.558
## 5 Scenario_SDM_PGD_ADMU             0.448             0.641
## 6 Scenario_SDM_vs_PGD               0.386             0.746
### Plots

## Violin plots

# Simpson

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Solution, y=simpson, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
  stat_summary(fun.y=mean, geom="point", color="red") +
  theme(axis.text.x = element_blank()) +
  scale_y_continuous(name="Simpson Index")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Area taxon conserved

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Solution, y=Prop_to_AreaSP, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
  stat_summary(fun.y=mean, geom="point", color="red") +
  theme(axis.text.x = element_blank()) +
  scale_y_continuous(name="Proportion of distribution of the taxon conserved")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Mean prop area proxies by taxon conserved

plot_b <-filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Solution, y=mean.prop, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
  stat_summary(fun.y=mean, geom="point", color="red") +
  theme(axis.text.x = element_blank()) +
  scale_y_continuous(name="Mean proportion of the area of each proxy conserved by taxon")
## Warning: `fun.y` is deprecated. Use `fun` instead.
plot_b

# Median prop area proxies by taxon ponserved

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Solution, y=median.prop, color=Solution)) + geom_violin() + geom_jitter(size = 0.3) +
  stat_summary(fun.y=mean, geom="point", color="red") +
  theme(axis.text.x = element_blank()) +
  scale_y_continuous(name="Median proportion of the area of each proxy conserved by taxon")
## Warning: `fun.y` is deprecated. Use `fun` instead.

# Median and mean together

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  gather(., SummaryStat, value, -Solution, -c(Richness:Prop_to_AreaSP), -Dist.to.Optimun, -sp) %>%
  ggplot(., aes(x=interaction(SummaryStat, Solution), y=value, color=SummaryStat)) + geom_violin() + geom_jitter(size = 0.1) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Proportion of the distribution of the taxon conserved vs MEAN proportion of each proxy conserved by taxon

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Solution)) + geom_point() +
  scale_x_continuous(name="Proportion of distribution of the taxon conserved") +
  scale_y_continuous(name="Mean proportion of the area of each proxy conserved by taxon")

# Proportion of the distribution of the taxon conserved vs MEADIAN proportion of each proxy conserved by taxon

filter(sols_summary_spp, Solution!="ENM_sp") %>%
  ggplot(., aes(x=Prop_to_AreaSP, y=median.prop, color=Solution)) + geom_point() +
  scale_x_continuous(name="Proportion of distribution of the taxon conserved") +
  scale_y_continuous(name="Median proportion of the area of each proxy conserved by taxon")

## with regression lines

plot_a <-filter(sols_summary_spp, Solution!="ENM_sp") %>%
  # plot points
  ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Solution)) + geom_point(size=0.7) +
  scale_x_continuous(name="Proportion of taxa distribution (%)") +
  scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)", 
                     breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
  # plot fitted curve
  geom_smooth(method=loess, aes(fill=Solution)) 
plot_a
## `geom_smooth()` using formula 'y ~ x'

### Plot for paper figure (Fig 4):


plot_a <- plot_a +
  # nicer background
  theme_bw()+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                    panel.grid.minor = element_blank(), 
                    axis.line = element_line(colour = "black")) +
  
  # nicer legend (fill and color are needed becasue we are plotting both points smooth and points)
  scale_fill_discrete(name = "Scenarios",
                      labels = c("SDM (n=116)", 
                                 "SDM+LZ (n=143)", "SDM+PGD (n=218)",
                                 "SDM*PGD (n=5004)", "SDM and PGD as ADMU (n=117)")) +
  scale_color_discrete(name = "Scenarios",
                       labels = c("SDM (n=116)", 
                                  "SDM+LZ (n=143)", "SDM+PGD (n=218)",
                                  "SDM*PGD (n=5004)", "SDM and PGD as ADMU (n=117)")) +
  theme(legend.position = c(.99, 0.01), legend.justification = c("right", "bottom")) +
  
  # title
  labs(title="a)") +

  # larger text
  theme(
  plot.title = element_text(size=14, face="bold"),
  axis.title = element_text(size=14),
  axis.text = element_text(size=13),
  legend.text = element_text(size=13),
  legend.title = element_text(size=13, face="bold"))

plot_a
## `geom_smooth()` using formula 'y ~ x'

plot_b <- plot_b +
  # nicer background
  theme_bw()+ theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                    panel.grid.minor = element_blank(), 
                    axis.line = element_line(colour = "black")) +
  # no legend 
  theme(legend.position = "none") +
  
  # nicar x names
  scale_x_discrete(name="Scenarios",
                   labels=c("SDM", 
                            "SDM+LZ", "SDM+PGD",
                            "SDM*PGD", "as ADMU")) +
  # title
  labs(title="b)") +
  
  # larger text
  theme(
    plot.title = element_text(size=14, face="bold"),
    axis.title = element_text(size=14),
    axis.text = element_text(size=13))

plot_b  

## Plot Together


# Multiple plot function taken from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

multiplot(plot_a, plot_b, cols=2)
## `geom_smooth()` using formula 'y ~ x'

## Supplementary analysis suggested during review:

# Read IUCN category per taxa
iucn<-read.csv("../data/spatial/Zonation_final_solutions/IUCN_threat_category.csv")

# change names to fit sols_summary_spp names
iucn<-mutate(iucn, Nombre_=gsub("_", " ", iucn$Nombre_)) %>%

# keep only needed variables
select(.,-Taxa) 

# add iucn data to Zonation solutions
sols_summary_spp<-left_join(sols_summary_spp, iucn, by =c("sp" = "Nombre_")) %>%

# Get Genus
separate(col=sp, 
         into=c("Genus", "Species"), 
         sep=" ", extra="merge", remove=FALSE)

# For the SDM*PGD curve of Fig 4a, get which taxa have less than 60% PGD represented
filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD", mean.prop<0.60) %>% 
  select(., mean.prop, Area, IUCN.threat.category, sp)
##    mean.prop   Area IUCN.threat.category                          sp
## 1  0.3748263  11810                   DD           Cucurbita cordata
## 2  0.4883239  32037                   LC          Cucurbita digitata
## 3  0.5813091  12183                   LC       Cucurbita lundelliana
## 4  0.5175327  13114                   DD           Cucurbita palmata
## 5  0.3760386  17106                   VU        Gossypium davidsonii
## 6  0.5506088  53446                   NT            Persea podadenia
## 7  0.5892198  30141                   LC     Phaseolus angustissimus
## 8  0.5311420  31513                   LC        Phaseolus filiformis
## 9  0.5789563 144453                   LC Phaseolus lunatus silvester
## 10 0.5429136  87717                 <NA>             Physalis acolia
## 11 0.5492944 277396                   LC           Physalis angulata
## 12 0.5622299  46563                   LC        Physalis crassifolia
## 13 0.5918319 316674                   LC        Physalis hederifolia
## 14 0.5959233 328147                   LC             Physalis patula
## 15 0.5527966 305552                   LC           Physalis pruinosa
## 16 0.5600602 401532                   LC          Physalis pubescens
## 17 0.5835505  71380                   LC       Tripsacum lanceolatum
# For the SDM*PGD curve of Fig 4a, get which taxa have >90% PGD represented
filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD", mean.prop>0.90) %>% 
  select(., mean.prop, Area, IUCN.threat.category, sp)
##    mean.prop  Area IUCN.threat.category                     sp
## 1  0.9119520 21704                   EN   Capsicum lanceolatum
## 2  0.9348443 12008                   VU Gossypium gossypioides
## 3  0.9530186 14472                   EN          Persea albida
## 4  0.9010968  8179                   LC        Persea caerulea
## 5  0.9115334 10910                   VU Persea donnell smithii
## 6  0.9036658 27794                   EN      Persea pallescens
## 7  0.9131448  9487                   LC       Persea vesticula
## 8  0.9184310 22707                   LC   Phaseolus jaliscanus
## 9  1.0000000   341                   LC        Physalis ignota
## 10 0.9397357  4969                   LC Solanum agrimonifolium
## 11 1.0000000   142                   VU         Solanum clarum
## 12 0.9384119 24517                   LC       Solanum hougasii
## 13 0.9522770 10185                   NT       Solanum trifidum
## 14 0.9728131  4788                   EN      Zea diploperennis
# Examine if there is a pattern on which have more or less PGD represented

# By genus
plot_genus <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
  # plot points
  ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Genus)) + geom_point(size=1.1) +
  scale_x_continuous(name="Proportion of taxa distribution (%)") +
  scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)", 
                     breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
  theme_bw() 
plot_genus

# by IUCN
iucn.cols<-c("#D81E05", "#FC7F3F", "#F9E814", "#CCE226", "#60C659", "#D1D1C6") # respectively for "CR", "EN", "VU", "NT", "LC", "DD"

plot_iucn <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
  # plot points
  ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=IUCN.threat.category)) + geom_point(size=1.1) +
  scale_x_continuous(name="Proportion of taxa distribution (%)") +
  scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)", 
                     breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
  scale_color_manual(values= iucn.cols,
                     breaks= c("CR", "EN", "VU", "NT", "LC", "DD"),
                     name= "Threat category") +
  theme_bw()+ labs(title="a)")
plot_iucn

# plot by area
plot_area <-filter(sols_summary_spp, Solution=="Scenario_SDM_vs_PGD") %>%
  # plot points
  ggplot(., aes(x=Prop_to_AreaSP, y=mean.prop, color=Area)) + geom_point(size=1.1) +
  scale_x_continuous(name="Proportion of taxa distribution (%)") +
  scale_y_continuous(name="Mean proportion of area of proxies of genetic differentiation by taxa (%)", 
                     breaks = seq(0, 1, by = 0.25), expand = c(0, 0)) +
  theme_bw() + labs(title="b)")
plot_area

multiplot(plot_iucn, plot_area, cols=1)

# Get session info
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] tidyr_1.2.0   dplyr_1.0.9   ggplot2_3.3.6
## 
## loaded via a namespace (and not attached):
##  [1] bslib_0.3.1      compiler_4.2.1   pillar_1.7.0     jquerylib_0.1.4 
##  [5] highr_0.9        tools_4.2.1      digest_0.6.29    lattice_0.20-45 
##  [9] nlme_3.1-157     jsonlite_1.8.0   evaluate_0.15    lifecycle_1.0.1 
## [13] tibble_3.1.7     gtable_0.3.0     mgcv_1.8-40      pkgconfig_2.0.3 
## [17] rlang_1.0.3      Matrix_1.4-1     DBI_1.1.3        cli_3.3.0       
## [21] rstudioapi_0.13  yaml_2.3.5       xfun_0.31        fastmap_1.1.0   
## [25] withr_2.5.0      stringr_1.4.0    knitr_1.39       generics_0.1.3  
## [29] sass_0.4.1       vctrs_0.4.1      tidyselect_1.1.2 glue_1.6.2      
## [33] R6_2.5.1         fansi_1.0.3      rmarkdown_2.14   farver_2.1.1    
## [37] purrr_0.3.4      magrittr_2.0.3   splines_4.2.1    scales_1.2.0    
## [41] htmltools_0.5.2  ellipsis_0.3.2   assertthat_0.2.1 colorspace_2.0-3
## [45] labeling_0.4.2   utf8_1.2.2       stringi_1.7.6    munsell_0.5.0   
## [49] crayon_1.5.1